import re
import sys
import os
from MyHTMLParser import MyHTMLParser
import urllib2

#Debug now
#fi = open('goog_finance.htm', 'r')
#html_text = fi.read()
#fi.close()

fa_url_template = "http://www.google.com/finance?q=%s%%3A%s&fstype=ii"
#################
# Main Function #
#################
#html = response.read()
#f = open('goog_finance', 'w')
#f.write(html)
#f.close()
if len(sys.argv) == 3:
    exchange = sys.argv[1]
    symbol = sys.argv[2]
    home = os.getenv("HOME")

    response = urllib2.urlopen(fa_url_template%(exchange, symbol))
    html_text = response.read()
    div_dir = "%s/stock_html/%s/%s"%(home,exchange, symbol)
    if not os.path.exists(div_dir):
        os.makedirs(div_dir)
else:
    exit (0)

html_div_names = ['incinterimdiv', 'incannualdiv',
             'balinterimdiv', 'balannualdiv',
             'casinterimdiv', 'casannualdiv']

# Strip all <script> tags
html_text = re.sub(r'<(script).*?</\1>(?s)', '', html_text)
html_text = re.sub(r'<(span).*?>(?s)', '', html_text)
html_text = re.sub(r'</span>(?s)', '', html_text)
html_lines = html_text.splitlines(True)
# Remove first line and last line
html_lines = html_lines[1:-2]
filtered_html_text = ''.join(html_lines)

parser = MyHTMLParser()
parser.SetSectionName(html_div_names)
parser.feed(filtered_html_text)

for (parser_div_name, start, end) in parser.divs:
    for html_div_name in html_div_names:
        if parser_div_name == html_div_name:
            f = open("%s/%s.html"%(div_dir, parser_div_name), 'w+')
            f.writelines(html_lines[start:end])
            f.close()
